"""
what is cross-validation

classification problem : decision trees
"""
import pandas as pd

if __name__ == '__main__':
    df = pd.read_csv('../test_cases/winequality-red.csv', sep=';')
    print(df.head())

    # a mapping dictionary that maps the quality values from 0 to 5
    quality_mapping = {
        3: 0,
        4: 1,
        5: 2,
        6: 3,
        7: 4,
        8: 5
    }
    # you can use the map function of pandas with
    # any dictionary to convert the values in a given
    # column to values in the dictionary
    df.loc[:, "quality"] = df.quality.map(quality_mapping)

    # use sample with frac=1 to shuffle the dataframe
    # we reset the indices since they change after
    # shuffling the dataframe
    df = df.sample(frac=1).reset_index(drop=True)
    # top 1000 rows are selected
    # for training
    df_train = df.head(1000)
    # bottom 599 values are selected
    # for testing/validation
    df_test = df.tail(599)
